package com.mapreduce;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.NLineInputFormat;
import org.apache.hadoop.mapreduce.lib.map.TokenCounterMapper;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.reduce.IntSumReducer;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import java.net.URI;

// 做一个主动分片，主要针对不能分片的文件(比如压缩包)
public class NlineMapReduce extends Configured implements Tool {
    @Override
    public int run(String[] args) throws Exception {
        Configuration conf = getConf();
        // 指定输入输出路径
        Path input = new Path(
                "hdfs://192.168.10.11:9000/indexdata");
        Path output = new Path(
                "hdfs://192.168.10.11:9000/nlineoutput");

        FileSystem fs = FileSystem.get(
                new URI("hdfs://192.168.10.11:9000")
                ,conf);
        if (fs.exists(output)) fs.delete(output,true);
        //构建Job
        Job job = Job.getInstance(conf);
        job.setJobName("nline");
        job.setJarByClass(this.getClass());
        // 使用单词计数map:TokenCounterMapper
        job.setMapperClass(TokenCounterMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        // IntSumReducer
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        job.setInputFormatClass(NLineInputFormat.class);
        // 优化操作，主动将数据分片，数据有几片，就会有几个map
        // 以此达到分布式的目的
        NLineInputFormat.setNumLinesPerSplit(job,2);
        NLineInputFormat.addInputPath(job,input);

        TextOutputFormat.setOutputPath(job,output);
        return job.waitForCompletion(true)?0:-1;
    }

    public static void main(String[] args) throws Exception {
        System.exit(ToolRunner.run(new NlineMapReduce(),args));
    }
}
